global input "Q:\dc1prhcmsas01\PU2\data - sas"
global input2 "Q:\dc1prhcmsas01\PU2\New folder"
global input3 "Q:\dc1prhcmsas01\PU2\temp"
global temp "Q:\dc1prhcmsas01\PU2\temp_stata_wr"
global log "Q:\dc1prhcmsas01\PU2\Log - Stata WR"
global output "Q:\dc1prhcmsas01\PU2\data_stata_wr"

cd "Q:\dc1prhcmsas01\PU2\Code - Stata WR"

capture log close
log using "$log/1_number_paychecks_wr", replace


*Import
forvalues year=2013/2021 {
    forvalues month=1/12 {
		if `year'<=2020 | (`year'==2021 & `month'<=10) {
		if (`year'==2015 & `month'<=10 & `month'!=4) | (`year'==2013 & `month'<=11) {
				capture import sas CLIENT_CODE YR_MONTH ERN1_A EMP_STAT_C PAY_FREQ_C using "$input\ap_`year'0`month'.sas7bdat", case(lower) clear
				capture import sas CLIENT_CODE YR_MONTH ERN1_A EMP_STAT_C PAY_FREQ_C using "$input\ap_`year'`month'.sas7bdat", case(lower) clear 
		}
		else {
				capture import sas client_code YR_MONTH ERN1_A EMP_STAT_C PAY_FREQ_C using "$input\ap_`year'0`month'.sas7bdat", case(lower) clear
				capture import sas client_code YR_MONTH ERN1_A EMP_STAT_C PAY_FREQ_C using "$input\ap_`year'`month'.sas7bdat", case(lower) clear
		}
   
		*Clean
		keep if emp_stat_c!="T"
		
		*Group by pay frequency
		replace pay_freq="B" if pay_freq=="2"
		replace pay_freq="W" if pay_freq=="4" | pay_freq=="5"
		
		collapse (mean) mean=ern1 (median) median=ern1 , by(yr_month client_code pay_freq)
		gen year=`year'
		
		compress
		save "$temp\temp_`year'`month'", replace
		}
	}
}


*Append
clear
forvalues year=2013/2021 {
    forvalues month=1/12 {
		if `year'<=2020 | (`year'==2021 & `month'<=10) {
	   append using "$temp\temp_`year'`month'"
		}
	}
}

*Compute median pay within firm-year
	bys year client_code pay_freq: gegen med_mean=median(mean)
	bys year client_code pay_freq: gegen med_med=median(median)
	
*Determine number of paychecks given deviation of pay from median
	gen num_pay_mean=4+(mean>=1.125*med_mean) if pay_freq=="W" 
	replace num_pay_mean=2+(mean>=1.25*med_mean) if pay_freq=="B" | pay_freq=="S"
	replace num_pay_mean=1 if pay_freq=="M"
	
	gen num_pay_med=4+(median>=1.125*med_med) if pay_freq=="W" 
	replace num_pay_med=2+(median>=1.25*med_med) if pay_freq=="B" | pay_freq=="S"
	replace num_pay_med=1 if pay_freq=="M"
	
	keep client_code yr_month pay_freq_c num_pay_mean num_pay_med
 
save "$output/number_paychecks.dta", replace

*Delete temp files
forvalues year=2013/2021 {
    forvalues month=1/12{
		if `year'<=2020 | (`year'==2021 & `month'<=10) {
	   erase "$temp\temp_`year'`month'.dta"
		}
	}
}

log close